{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Based on this paper - https://apps.dtic.mil/dtic/tr/fulltext/u2/a164453.pdf"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 319,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from statistics import mean\n",
    "import math\n",
    "\n",
    "data = pd.read_csv(\"../../data/clean_weather.csv\")\n",
    "data = data.ffill()"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 320,
   "outputs": [],
   "source": [
    "PREDICTORS = [\"tmax\", \"tmin\", \"rain\"]\n",
    "TARGET = \"tmax_tomorrow\"\n",
    "\n",
    "scaler = StandardScaler()\n",
    "data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])\n",
    "\n",
    "np.random.seed(0)\n",
    "split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])\n",
    "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 321,
   "outputs": [],
   "source": [
    "# Rnn\n",
    "# Input -> hidden\n",
    "# hidden -> hidden\n",
    "# hidden -> output"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 322,
   "outputs": [],
   "source": [
    "def init_params(layer_conf):\n",
    "    layers = []\n",
    "    for i in range(len(layer_conf)):\n",
    "        if layer_conf[i][\"type\"] == \"input\":\n",
    "            continue\n",
    "        elif layer_conf[i][\"type\"] == \"rnn\":\n",
    "            np.random.seed(0)\n",
    "            k = 1/math.sqrt(layer_conf[i][\"hidden\"])\n",
    "            input_weights = np.random.rand(layer_conf[i-1][\"units\"], layer_conf[i][\"hidden\"]) * 2 * k - k\n",
    "\n",
    "            hidden_weights = np.random.rand(layer_conf[i][\"hidden\"], layer_conf[i][\"hidden\"]) * 2 * k - k\n",
    "            hidden_bias = np.random.rand(1, layer_conf[i][\"hidden\"]) * 2 * k - k\n",
    "\n",
    "            output_weights = np.random.rand(layer_conf[i][\"hidden\"], layer_conf[i][\"output\"]) * 2 * k - k\n",
    "            output_bias = np.random.rand(1, layer_conf[i][\"output\"]) * 2 * k - k\n",
    "\n",
    "            layers.append(\n",
    "                [[input_weights], [hidden_weights, hidden_bias], [output_weights, output_bias]]\n",
    "            )\n",
    "    return layers"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 323,
   "outputs": [],
   "source": [
    "def forward(params, x, layer_conf):\n",
    "    hiddens = []\n",
    "    outputs = []\n",
    "    for i in range(len(params)):\n",
    "        if layer_conf[i+1][\"type\"] == \"rnn\":\n",
    "            [i_weight], [h_weight, h_bias], [o_weight, o_bias] = params[i]\n",
    "            hidden = np.zeros((x.shape[0], i_weight.shape[1]))\n",
    "            output = np.zeros((x.shape[0], o_weight.shape[1]))\n",
    "            for j in range(x.shape[0]):\n",
    "                input_x = x[j,:] @ i_weight\n",
    "                hidden_x = input_x + hidden[max(j-1,0),:] @ h_weight + h_bias\n",
    "                # Activation.  tanh avoids outputs getting larger and larger.\n",
    "                hidden_x = np.tanh(hidden_x)\n",
    "                # Store hidden for use in backprop\n",
    "                hidden[j,:] = hidden_x.copy()\n",
    "\n",
    "                # Activation\n",
    "                output_x = hidden_x @ o_weight + o_bias\n",
    "                output[j,:] = output_x.copy()\n",
    "            hiddens.append(hidden)\n",
    "            outputs.append(output)\n",
    "    return hiddens, outputs"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 324,
   "outputs": [],
   "source": [
    "def mse(actual, predicted):\n",
    "    return np.mean((actual-predicted)**2)\n",
    "\n",
    "def mse_grad(actual, predicted):\n",
    "    return (predicted - actual)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 325,
   "outputs": [],
   "source": [
    "def backward(params, x, lr, grad, hiddens, layer_conf):\n",
    "    for i in range(len(params)):\n",
    "        if layer_conf[i+1][\"type\"] == \"rnn\":\n",
    "            [i_weight], [h_weight, h_bias], [o_weight, o_bias] = params[i]\n",
    "            hidden = hiddens[i]\n",
    "            next_h_grad = None\n",
    "            i_weight_grad, h_weight_grad, h_bias_grad, o_weight_grad, o_bias_grad = [0] * 5\n",
    "\n",
    "            for j in range(x.shape[0] - 1, -1, -1):\n",
    "                # 1,1\n",
    "                out_grad = grad[j,:][:,np.newaxis]\n",
    "\n",
    "                # Output updates\n",
    "                # (1,1 @ 1,3).T = 3,1\n",
    "                o_weight_grad += (out_grad @ hidden[j,:][np.newaxis, :]).T\n",
    "                # 1,1\n",
    "                o_bias_grad += out_grad\n",
    "\n",
    "                # Propagate gradient to hidden unit\n",
    "                # (3,1 @ 1,1).T = 1,3\n",
    "                ho_grad = (o_weight @ out_grad).T\n",
    "\n",
    "                if j < x.shape[0] - 1:\n",
    "                    tanh_deriv_next = 1 - hidden[j+1] ** 2\n",
    "                    # 1,3 @ 3,3 @ 3,3\n",
    "                    hh_grad = next_h_grad @ np.diag(tanh_deriv_next) @ h_weight\n",
    "                    h_grad = hh_grad + ho_grad\n",
    "                else:\n",
    "                    h_grad = ho_grad\n",
    "\n",
    "                next_h_grad = h_grad.copy()\n",
    "\n",
    "                tanh_deriv = 1 - hidden[j] ** 2\n",
    "                # 1,3.T @ (1,3 @ 3,3)\n",
    "                i_weight_grad += x[j,:][:,np.newaxis] @ (h_grad @ np.diag(tanh_deriv))\n",
    "\n",
    "                if j > 0:\n",
    "                    # (1,3 @ 3,3).T = 3,1 @ 1,3\n",
    "                    # Turn this to match the correct grad values with the associated weights\n",
    "                    h_weight_grad += ((h_grad @ np.diag(tanh_deriv)).T @ hidden[j-1][np.newaxis,:]).T\n",
    "                    # (1,3) @ 3,3\n",
    "                    h_bias_grad += h_grad @ np.diag(tanh_deriv)\n",
    "\n",
    "            i_weight -= i_weight_grad * lr\n",
    "            h_weight -= h_weight_grad * lr\n",
    "            h_bias -= h_bias_grad * lr\n",
    "            o_weight -= o_weight_grad * lr\n",
    "            o_bias -= o_bias_grad * lr\n",
    "            params[i] = [[i_weight], [h_weight, h_bias], [o_weight, o_bias]]\n",
    "    return params"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 0 valid loss 89.86535111568139\n",
      "Epoch: 1 valid loss 74.38209988279957\n",
      "Epoch: 2 valid loss 46.68417143453307\n",
      "Epoch: 3 valid loss 35.27479367955674\n",
      "Epoch: 4 valid loss 31.616366595603132\n",
      "Epoch: 5 valid loss 30.51672686115938\n",
      "Epoch: 6 valid loss 29.53137483865339\n",
      "Epoch: 7 valid loss 28.4821828286481\n",
      "Epoch: 8 valid loss 27.915303477724308\n",
      "Epoch: 9 valid loss 27.7105958059442\n",
      "Epoch: 10 valid loss 27.798556917758656\n",
      "Epoch: 11 valid loss 28.69006176689853\n",
      "Epoch: 12 valid loss 29.77378069212401\n",
      "Epoch: 13 valid loss 30.006155465845147\n",
      "Epoch: 14 valid loss 29.912103897964553\n",
      "Epoch: 15 valid loss 29.96274696364353\n",
      "Epoch: 16 valid loss 30.119110347572366\n",
      "Epoch: 17 valid loss 30.310023343212126\n",
      "Epoch: 18 valid loss 30.495203347830774\n",
      "Epoch: 19 valid loss 30.656271606191556\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[0;31mKeyboardInterrupt\u001B[0m                         Traceback (most recent call last)",
      "Cell \u001B[0;32mIn[328], line 18\u001B[0m\n\u001B[1;32m     16\u001B[0m     hiddens, outputs \u001B[38;5;241m=\u001B[39m forward(params, seq_x, layer_conf)\n\u001B[1;32m     17\u001B[0m     grad \u001B[38;5;241m=\u001B[39m mse_grad(seq_y, outputs[\u001B[38;5;241m0\u001B[39m])\n\u001B[0;32m---> 18\u001B[0m     params \u001B[38;5;241m=\u001B[39m \u001B[43mbackward\u001B[49m\u001B[43m(\u001B[49m\u001B[43mparams\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mseq_x\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mlr\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mgrad\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mhiddens\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mlayer_conf\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m     20\u001B[0m sequence_len \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m7\u001B[39m\n\u001B[1;32m     21\u001B[0m losses \u001B[38;5;241m=\u001B[39m []\n",
      "Cell \u001B[0;32mIn[325], line 40\u001B[0m, in \u001B[0;36mbackward\u001B[0;34m(params, x, lr, grad, hiddens, layer_conf)\u001B[0m\n\u001B[1;32m     35\u001B[0m i_weight_grad \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m x[j,:][:,np\u001B[38;5;241m.\u001B[39mnewaxis] \u001B[38;5;241m@\u001B[39m (h_grad \u001B[38;5;241m@\u001B[39m np\u001B[38;5;241m.\u001B[39mdiag(tanh_deriv))\n\u001B[1;32m     37\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m j \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m0\u001B[39m:\n\u001B[1;32m     38\u001B[0m     \u001B[38;5;66;03m# (1,3 @ 3,3).T = 3,1 @ 1,3\u001B[39;00m\n\u001B[1;32m     39\u001B[0m     \u001B[38;5;66;03m# Turn this to match the correct grad values with the associated weights\u001B[39;00m\n\u001B[0;32m---> 40\u001B[0m     h_weight_grad \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m ((h_grad \u001B[38;5;241m@\u001B[39m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdiag\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtanh_deriv\u001B[49m\u001B[43m)\u001B[49m)\u001B[38;5;241m.\u001B[39mT \u001B[38;5;241m@\u001B[39m hidden[j\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m][np\u001B[38;5;241m.\u001B[39mnewaxis,:])\u001B[38;5;241m.\u001B[39mT\n\u001B[1;32m     41\u001B[0m     \u001B[38;5;66;03m# (1,3) @ 3,3\u001B[39;00m\n\u001B[1;32m     42\u001B[0m     h_bias_grad \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m h_grad \u001B[38;5;241m@\u001B[39m np\u001B[38;5;241m.\u001B[39mdiag(tanh_deriv)\n",
      "File \u001B[0;32m<__array_function__ internals>:180\u001B[0m, in \u001B[0;36mdiag\u001B[0;34m(*args, **kwargs)\u001B[0m\n",
      "File \u001B[0;32m~/.virtualenvs/nnets/lib/python3.10/site-packages/numpy/lib/twodim_base.py:236\u001B[0m, in \u001B[0;36m_diag_dispatcher\u001B[0;34m(v, k)\u001B[0m\n\u001B[1;32m    228\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m m\n\u001B[1;32m    231\u001B[0m _eye_with_like \u001B[38;5;241m=\u001B[39m array_function_dispatch(\n\u001B[1;32m    232\u001B[0m     _eye_dispatcher\n\u001B[1;32m    233\u001B[0m )(eye)\n\u001B[0;32m--> 236\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m_diag_dispatcher\u001B[39m(v, k\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[1;32m    237\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m (v,)\n\u001B[1;32m    240\u001B[0m \u001B[38;5;129m@array_function_dispatch\u001B[39m(_diag_dispatcher)\n\u001B[1;32m    241\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdiag\u001B[39m(v, k\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m0\u001B[39m):\n",
      "\u001B[0;31mKeyboardInterrupt\u001B[0m: "
     ]
    }
   ],
   "source": [
    "epochs = 50\n",
    "lr = 1e-5\n",
    "\n",
    "\n",
    "layer_conf = [\n",
    "    {\"type\":\"input\", \"units\": 3},\n",
    "    {\"type\": \"rnn\", \"hidden\": 4, \"output\": 1}\n",
    "]\n",
    "params = init_params(layer_conf)\n",
    "\n",
    "for i in range(epochs):\n",
    "    sequence_len = 7\n",
    "    for j in range(train_x.shape[0] - sequence_len):\n",
    "        seq_x = train_x[j:(j+sequence_len),]\n",
    "        seq_y = train_y[j:(j+sequence_len),]\n",
    "        hiddens, outputs = forward(params, seq_x, layer_conf)\n",
    "        grad = mse_grad(seq_y, outputs[0])\n",
    "        params = backward(params, seq_x, lr, grad, hiddens, layer_conf)\n",
    "\n",
    "    sequence_len = 7\n",
    "    losses = []\n",
    "    for j in range(valid_x.shape[0] - sequence_len):\n",
    "        seq_x = valid_x[j:(j+sequence_len),]\n",
    "        seq_y = valid_y[j:(j+sequence_len),]\n",
    "        _, outputs = forward(params, seq_x, layer_conf)\n",
    "        losses.append(mse(seq_y, outputs[0]))\n",
    "\n",
    "    print(f\"Epoch: {i} valid loss {mean(losses)}\")"
   ],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
